library(SummarizedExperiment)
library(dplyr)
library(ggplot2)
library(knitr)
library(DT)
library(limma)
library(EnhancedVolcano)
library(fgsea)
library(msigdbr)
library(gage)
library(pathview)
library(UpSetR)
Load the Summarized Experiment data object, extract clinical information, log-transformed TPM (Transcripts Per Million) expression data, along with annotations. Prepare the gene expression data for analysis.
# Load your SE result and extract clinical data , expression data and annotation
# Load SE obj
se <- readRDS("~/BHK lab/kevin Project/Cholangiocarcinoma/output data/SE_CCA.rds")
# Extract Clinical data
clin <- data.frame(colData(se)) #dim is 33 x 11
# Extract the expression data
expr_tpm <- assays(se)[["expr_tpm"]] #dim 38040 x 33
# Extract the annotation
annot <- data.frame(rowData(se)) #dim is 38040 x 11
# Display first few rows of the dataset
DT::datatable(round(expr_tpm[1:8, 1:7], 3))
Preparing expression data (RNA-seq) for analysis:
Total 16,256 genes remain for downstream analyses.
# Step 1: Restrict to Protein-Coding Genes.
annot_proteincoding <- annot[annot$gene_type == "protein_coding",] # 19205 protein coding genes.
expr_tpm<- expr_tpm[rownames(expr_tpm) %in% rownames(annot_proteincoding),]
dim(expr_tpm)
## [1] 19205 33
# Step 2: Filter Low/Zero Expression Genes]
#data is log2(TPM+1)
r <- as.numeric(apply(expr_tpm, 1, function(i) sum(round(i, 6) == round(log2(1), 6))))
# Get the indices of rows to remove
remove <- which(r > dim(expr_tpm)[2] * 0.5)
# Remove rows from the matrix
expr_tpm <- expr_tpm[-remove, ] #dim is 16256 x 33
DT::datatable(round(expr_tpm[1:8, 1:8], 3))
Continuous Variables (e.g., Age): Calculated the median and IQR and assessed associations using the Wilcoxon test.
Discrete/Categorical Variables (e.g., Sex, Stage, Location): Determined frequency and percentage distributions and assessed associations using Fisher’s exact test.
# Set 'y' and 'n' to 'Autoimmune' and 'Non-Autoimmune' within the dataframe
clin$psc_ibd[clin$psc_ibd == 'y'] <- 'Autoimmune'
clin$psc_ibd[clin$psc_ibd == 'n'] <- 'Non-Autoimmune'
# 1. Age
# Summarize age statistics (count,Mean(SD), Median(IQR), Min,Max) for each psc_ibd group.
summary_stats <- summarise(
group_by(clin, psc_ibd),
count = n(),
Mean_SD = sprintf("%.2f(%.2f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
Median_IQR = sprintf("%.2f(%.2f)", median(age, na.rm = TRUE), IQR(age, na.rm = TRUE)),
Min_Max = sprintf("%.2f,%.2f", min(age, na.rm = TRUE), max(age, na.rm = TRUE))
)
# Store statistics for Auto and Non-Auto for later use
auto_count <- summary_stats$count[summary_stats$psc_ibd == "Autoimmune"]
non_auto_count <- summary_stats$count[summary_stats$psc_ibd == "Non-Autoimmune"]
# Visualize your data.
# Plotting boxplot for age distributions
ggplot(clin, aes(x = psc_ibd, y = age, color = psc_ibd)) +
geom_boxplot() +
scale_color_manual(values = c("Autoimmune" = "#00AFBB", "Non-Autoimmune" = "#E7B800")) +
labs(y = "Age", x = "PSC-IBD")
# Perform two-sample Wilcoxon test
res <- wilcox.test(age ~ psc_ibd, data = clin, exact = FALSE)
p_value_age <- round(res$p.value, 2)
# 2. Sex
# Perform Fisher's Exact Test for 'sex' and 'psc_ibd'
contingency_sex <- table(clin$sex, clin$psc_ibd)
p_value_sex <- round(fisher.test(contingency_sex)$p.value,2)
# Calculate and format percentages for Sex
sex_stats <- apply(contingency_sex, 2, function(x) sprintf("%d (%.1f%%)", x, 100 * x / sum(x)))
# Perform Fisher's Exact Test for 'location' and 'psc_ibd'
contingency_loc <- table(clin$location, clin$psc_ibd)
p_value_loc <- round(fisher.test(contingency_loc)$p.value, 2)
# Calculate and format percentages for Location
location_stats <- apply(contingency_loc, 2, function(x) sprintf("%d (%.1f%%)", x, 100 * x / sum(x)))
# Perform Fisher's Exact Test for 'stage' and 'psc_ibd'
# Convert "IIIA", "IIIB", "IIIC" to "III and convert "IVA", "IVB" to "IV".
clin$stage <- with(clin, ifelse(stage %in% c("IIIA", "IIIB", "IIIC"), "III",
ifelse(stage %in% c("IVA", "IVB"), "IV", stage)))
contingency_stage <- table(clin$stage, clin$psc_ibd)
p_value_stage <- round(fisher.test(contingency_stage)$p.value, 2)
# Calculate and format percentages for Stage
stage_stats <- apply(contingency_stage, 2, function(x) sprintf("%d (%.1f%%)", x, 100 * x / sum(x)))
# Combine all statistics into a data frame for the table
result <- data.frame(
Characteristics = c("Age", "Mean (SD)", "Median (IQR)", "Min, Max", "",
"Sex", "F", "M", "","Location", "dCCA", "GBC", "iCCA",
"pCCA", "", "Stage", "II", "III", "IV"),
`Autoimmune (n=14)` = c("", summary_stats$Mean_SD[1], summary_stats$Median_IQR[1], summary_stats$Min_Max[1],
"","",sex_stats[1, 1], sex_stats[2, 1], "", "", location_stats[1, 1],
location_stats[2, 1], location_stats[3, 1], location_stats[4, 1],"","",
stage_stats[1, 1], stage_stats[2, 1], stage_stats[3, 1]),
`Non-Autoimmune (n=19)` = c("", summary_stats$Mean_SD[2], summary_stats$Median_IQR[2],summary_stats$Min_Max[2],
"","", sex_stats[1, 2], sex_stats[2, 2], "", "", location_stats[1, 2],
location_stats[2, 2],location_stats[3, 2], location_stats[4, 2],"","",
stage_stats[1, 2], stage_stats[2, 2],stage_stats[3, 2]),
`P value` = c(p_value_age, "", "", "", "",
p_value_sex, "", "", "", p_value_loc,
"", "", "", "", "",p_value_stage, "", "", ""),
check.names = FALSE
)
table_html <- kable(
result,
format = "html",
caption =" Table 1: Clinicodemographic and Staging Characteristics at Baseline (AJCC 8th Edition)",
escape = FALSE,
table.attr = 'style="width:100%; overflow-x:scroll; overflow-y:scroll; max-height:400px;"'
)
table_html
| Characteristics | Autoimmune (n=14) | Non-Autoimmune (n=19) | P value |
|---|---|---|---|
| Age | 0.01 | ||
| Mean (SD) | 43.57(10.68) | 54.68(10.91) | |
| Median (IQR) | 41.50(12.25) | 52.00(16.00) | |
| Min, Max | 26.00,66.00 | 41.00,74.00 | |
| Sex | 0.28 | ||
| F | 3 (21.4%) | 8 (42.1%) | |
| M | 11 (78.6%) | 11 (57.9%) | |
| Location | 0.02 | ||
| dCCA | 0 (0.0%) | 1 (5.3%) | |
| GBC | 1 (7.1%) | 3 (15.8%) | |
| iCCA | 8 (57.1%) | 15 (78.9%) | |
| pCCA | 5 (35.7%) | 0 (0.0%) | |
| Stage | 0.52 | ||
| II | 5 (35.7%) | 8 (42.1%) | |
| III | 4 (28.6%) | 2 (10.5%) | |
| IV | 5 (35.7%) | 9 (47.4%) |
# Limma approach applied.
design <- model.matrix(~ clin$psc_ibd)
fit <- lmFit(expr_tpm, design)
fit <- eBayes(fit)
# Sort by Pvalue.
top.table <- topTable(fit, sort.by = "P", n = Inf)
# To see How many DE gene are there.
length(which(top.table$adj.P.Val < 0.05)) # 460 DE genes.
## [1] 460
# Preview
DT::datatable(data.frame(round(data.frame(top.table),3)))
# Save top table to .txt file.
top.table$Gene <- rownames(top.table)
write.table(top.table, file = "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/Limma_Fit.txt", row.names = F, sep = "\t", quote = F)
# 1. Volcano plot.
# 1.1 Volcano Plot based on P value
EnhancedVolcano(top.table,
lab = rownames(top.table),
x = 'logFC',
y = 'P.Value',
pCutoff = 0.05,
FCcutoff = 1.5,
xlim = c(-5, 5),
ylim = c(0, -log10(10e-4)),
title= 'Volcano Plot based on P value'
)
# 1.2 Volcano Plot based on FDR
# adjusted p-values as red
EnhancedVolcano(top.table,
lab = rownames(top.table),
x = 'logFC',
y = 'adj.P.Val',
pCutoff = 0.05,
FCcutoff = 1.5,
xlim = c(-5, 5),
ylim = c(0, -log10(10e-4)),
legendLabels=c('Not sig.','Log FC','adj p-value',
'adj p-value & Log FC'),
title= 'Volcano Plot based on FDR',)
# Load human hallmark gene sets from MSigDB
pathwaysDF <- msigdbr("Homo sapiens", category="H")
# Create a list of gene symbols for each pathway
pathwaysH <- split(as.character(pathwaysDF$gene_symbol), pathwaysDF$gs_name)
# 1. Create ranks
# Create 'ranks' vector from 'top.table' with logFC as values and gene names as names.
ranks <- top.table$logFC
names(ranks) <- top.table$Gene
ranks <- sort(ranks, decreasing = T)
# Preview the data table of ranks.
DT::datatable(data.frame(ranks))
# 2. Bar Plot
# Display the ranked fold changes.
barplot(ranks)
# 3. Conduct analysis
# Run fgsea with 'pathwaysH' and 'ranks'.
fgseaRes <- fgsea(pathwaysH, ranks)
fgseaRes <- na.omit(fgseaRes)
fgseaResTidy <- fgseaRes %>%
as_tibble() %>%
arrange(desc(NES)) #order by NES
# Combine genes in leadingEdge with semicolons
fgseaResTidy$leadingEdge <- sapply(fgseaResTidy$leadingEdge, paste, collapse = ";")
fgseaResTidy <- fgseaResTidy[order(fgseaResTidy$padj), ]
# Show in a Tidy table.
head(fgseaResTidy)
## # A tibble: 6 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 HALLMARK_INTERFERON_GAM… 4.25e-6 1.96e-4 0.611 0.466 1.55 199 CFB;IFIT2;…
## 2 HALLMARK_EPITHELIAL_MES… 9.38e-6 2.16e-4 0.593 0.460 1.53 199 SPP1;CXCL6…
## 3 HALLMARK_COAGULATION 2.67e-5 4.10e-4 0.576 0.486 1.59 129 SERPINA1;H…
## 4 HALLMARK_INTERFERON_ALP… 6.89e-5 7.92e-4 0.538 0.511 1.63 97 IFIT2;IFIT…
## 5 HALLMARK_TNFA_SIGNALING… 1.24e-4 1.14e-3 0.519 0.436 1.45 198 CXCL2;CXCL…
## 6 HALLMARK_OXIDATIVE_PHOS… 2.21e-4 1.69e-3 0.519 0.433 1.44 200 LDHB;MAOB;…
# Save fgseaRes as txt file.
write.table(fgseaResTidy, "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/fgseaRes_HRank.txt", sep = "\t", row.names = FALSE, quote = FALSE)
# Mark "significant" if adjusted p-value <= 0.05, else "non-significant".
fgseaResTidy$adjPvalue <- ifelse(fgseaResTidy$padj <= 0.05, "significant", "non-significant")
# Define colors for significance: grey for non-significant and red for significant results.
cols <- c("non-significant" = "grey", "significant" = "red")
# Using ggplot: reorder pathways by NES, fill bars based on significance, flip coordinates for readability.
ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES, fill = adjPvalue)) + geom_col() +
scale_fill_manual(values = cols) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "Pathway", y = "Normalized Enrichment Score", title = "Hallmark pathways NES
Enrichment Score from GSEA")
# 4. GSEA Results Table Plot.
# Plot GSEA table for significant pathways (padj < 0.05) using a GSEA parameter of 0.5.
plotGseaTable(pathwaysH[fgseaRes$pathway[fgseaRes$padj < 0.05]], ranks, fgseaRes, gseaParam=0.5)
# 5. Enrichment score plot.
# For significant pathways # 14 pathways.
# Store the list of significant pathways (padj < 0.05).
significant_pathways <- fgseaRes$pathway[fgseaRes$padj < 0.05]
# Use a for loop to iterate and plot enrichment for each significant pathway
for (i in significant_pathways) {
enrichment_plot <- plotEnrichment(pathwaysH[[i]], ranks)
enrichment_plot <- enrichment_plot + ggtitle(paste("Enrichment Plot for Pathway:",i))
print(enrichment_plot)
}
# Plot significant pathways as points with a gradient color based on p-value and size based on a 'size' variable.
ggplot(fgseaResTidy[fgseaResTidy$padj < 0.05, ], aes(y=reorder(pathway, size),x= NES)) + geom_point(aes(color=padj,size=size)) +
scale_color_gradientn(colours = c("red","blue")) +
labs(x='Normalized Enrichment Score', y=NULL ) +
theme( axis.title = element_text(face='bold'), axis.text = element_text (face='bold') )
To do KEGG pathway analysis, both KEGG_MEDICUS and KEGG_LEGACY are downloaded. The cutoff of 0.05 is applied for visualization, while the text files include the results across all gene sets.
186 canonical pathways gene sets derived from the KEGG MEDICUS pathway database.
# Load KEGG_MEDICUS pathways.
pathwaysKEG <- gmtPathways("~/BHK lab/kevin Project/Cholangiocarcinoma/GSEA/c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt")
# Perform fgsea with KEGG pathways
fgseaRes_kegg <- fgsea(pathwaysKEG, ranks)
# Organize and sort KEGG results by NES
fgseaResTidy_kegg <- fgseaRes_kegg %>%
as_tibble() %>%
arrange(desc(NES))
# Combine genes in leadingEdge with semicolons
fgseaResTidy_kegg$leadingEdge <- sapply(fgseaResTidy_kegg$leadingEdge, paste, collapse = ";")
# Show in a Tidy table of keg.
head(fgseaResTidy_kegg)
## # A tibble: 6 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 KEGG_MEDICUS_REFERENCE_… 2.74e-5 0.00283 0.576 0.893 1.92 8 C4A;C4B;C1…
## 2 KEGG_MEDICUS_REFERENCE_… 8.47e-5 0.00582 0.538 0.873 1.88 8 FGA;FGB;FG…
## 3 KEGG_MEDICUS_REFERENCE_… 4.82e-5 0.00426 0.557 0.932 1.86 6 SERPING1;C…
## 4 KEGG_MEDICUS_VARIANT_MU… 1.82e-5 0.00283 0.576 0.618 1.80 45 NDUFB7;NDU…
## 5 KEGG_MEDICUS_VARIANT_MU… 1.85e-5 0.00283 0.576 0.618 1.80 45 NDUFB7;NDU…
## 6 KEGG_MEDICUS_REFERENCE_… 8.20e-6 0.00254 0.593 0.620 1.80 44 NDUFB7;NDU…
# Save fgseaRes_keg as txt file.
write.table(fgseaResTidy_kegg, "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/fgseaRes_kegRank.txt", sep = "\t", row.names = FALSE, quote = FALSE)
# Mark "significant" if adjusted p-value <= 0.05, else "non-significant".
fgseaResTidy_kegg$adjPvalue <- ifelse(fgseaResTidy_kegg$padj <= 0.05, "significant", "non-significant")
# Create a ggplot of significant pathways, with bars colored based on adjusted p-value significance.
ggplot(fgseaResTidy_kegg[fgseaResTidy_kegg$padj < 0.05, ], aes(reorder(pathway, NES), NES, fill = adjPvalue)) +
geom_col() +
scale_fill_manual(values = cols) +
coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "Pathway", y = "Normalized Enrichment Score", title = "KEGG pathways NES Enrichment Score from GSEA")
# Plot significant pathways as points with a gradient color based on p-value and size based on a 'size' variable.
ggplot(fgseaResTidy_kegg[fgseaResTidy_kegg$padj < 0.05, ], aes(y=reorder(pathway, size),x= NES)) + geom_point(aes(color=padj,size=size)) +
scale_color_gradientn(colours = c("red","blue")) +
labs(x='Normalized Enrichment Score', y=NULL ) +
theme( axis.title = element_text(face='bold'), axis.text = element_text (face='bold') )
## KEGG_LEGACY
186 canonical pathways gene sets derived from the KEGG pathway database. These are considered Legacy gene sets since the introduction of the gene sets based on the more recent KEGG MEDICUS data.
# Load KEGG_MEDICUS pathways.
pathwaysKEG_LEG <- gmtPathways("~/BHK lab/kevin Project/Cholangiocarcinoma/GSEA/c2.cp.kegg_legacy.v2023.2.Hs.symbols.gmt")
# Perform fgsea with KEGG pathways
fgseaRes_kegg <- fgsea(pathwaysKEG, ranks)
# Organize and sort KEGG results by NES
fgseaResTidy_kegg <- fgseaRes_kegg %>%
as_tibble() %>%
arrange(desc(NES))
# Combine genes in leadingEdge with semicolons
fgseaResTidy_kegg$leadingEdge <- sapply(fgseaResTidy_kegg$leadingEdge, paste, collapse = ";")
# Show in a Tidy table of keg.
head(fgseaResTidy_kegg)
## # A tibble: 6 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 KEGG_MEDICUS_REFERENCE_… 2.49e-5 0.00318 0.576 0.893 1.90 8 C4A;C4B;C1…
## 2 KEGG_MEDICUS_REFERENCE_… 5.93e-5 0.00524 0.557 0.932 1.86 6 SERPING1;C…
## 3 KEGG_MEDICUS_REFERENCE_… 6.98e-5 0.00540 0.538 0.873 1.85 8 FGA;FGB;FG…
## 4 KEGG_MEDICUS_VARIANT_MU… 2.82e-5 0.00318 0.576 0.618 1.85 45 NDUFB7;NDU…
## 5 KEGG_MEDICUS_VARIANT_MU… 2.82e-5 0.00318 0.576 0.618 1.84 45 NDUFB7;NDU…
## 6 KEGG_MEDICUS_REFERENCE_… 1.51e-5 0.00318 0.593 0.620 1.84 44 NDUFB7;NDU…
# Save fgseaRes_keg as txt file.
write.table(fgseaResTidy_kegg, "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/fgseaRes_KEGGRank_legacy.txt", sep = "\t", row.names = FALSE, quote = FALSE)
# Mark "significant" if adjusted p-value <= 0.05, else "non-significant".
fgseaResTidy_kegg$adjPvalue <- ifelse(fgseaResTidy_kegg$padj <= 0.05, "significant", "non-significant")
# Create a ggplot of significant pathways, with bars colored based on adjusted p-value significance.
ggplot(fgseaResTidy_kegg[fgseaResTidy_kegg$padj < 0.05, ], aes(reorder(pathway, NES), NES, fill = adjPvalue)) +
geom_col() +
scale_fill_manual(values = cols) +
coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "Pathway", y = "Normalized Enrichment Score", title = "KEGG (legacy) pathways NES Enrichment Score from GSEA")
# Plot significant pathways as points with a gradient color based on p-value and size based on a 'size' variable.
ggplot(fgseaResTidy_kegg[fgseaResTidy_kegg$padj < 0.05, ], aes(y=reorder(pathway, size),x= NES)) + geom_point(aes(color=padj,size=size)) +
scale_color_gradientn(colours = c("red","blue")) +
labs(x='Normalized Enrichment Score', y=NULL ) +
theme( axis.title = element_text(face='bold'), axis.text = element_text (face='bold') )
To identifies whether certain gene sets are over-represented in a target gene group.
# Set a vector of differentially expressed genes for ORA
deGenes <- rownames(top.table)[top.table$adj.P.Val < 0.05] # 460 genes
# Defining the gene universe as all genes considered in the analysis
geneUniverse <- rownames(expr_tpm)
## Performing ORA
# ORA for Hallmark pathways
oraHallmark <- fora(pathways = pathwaysH, genes = deGenes, universe = geneUniverse, minSize = 15, maxSize = 500)
oraHallmark$Pathway_Set <- "Hallmark"
oraHallmark$Significance <- ifelse(oraHallmark$padj < 0.05, "Significant", "Not Significant")
# ORA for KEGG_MEDICUS pathways
oraKeggMed <- fora(pathways = pathwaysKEG, genes = deGenes, universe = geneUniverse, minSize = 15, maxSize = 500)
oraKeggMed$Pathway_Set <- "KEGG_MEDICUS"
oraKeggMed$Significance <- ifelse(oraKeggMed$padj < 0.05, "Significant", "Not Significant")
# ORA for KEGG_LEGACY pathways
oraKeggLeg <- fora(pathways = pathwaysKEG_LEG, genes = deGenes, universe = geneUniverse, minSize = 15, maxSize = 500)
oraKeggLeg$Pathway_Set <- "KEGG_LEGACY"
oraKeggLeg$Significance <- ifelse(oraKeggLeg$padj < 0.05, "Significant", "Not Significant")
# Display the top results for each pathway set
# DT::datatable(head(oraHallmark,
# options = list(scrollX = TRUE, scrollY = TRUE, pageLength = 4, autoWidth = TRUE))
DT::datatable(oraHallmark)
DT::datatable(oraKeggMed)
DT::datatable(oraKeggLeg)
# Save the data frame as an RDS file
saveRDS(oraHallmark, file = "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/oraHallmark.rds")
saveRDS(oraKeggMed, file = "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/oraKeggMed.rds")
saveRDS(oraKeggLeg, file = "~/BHK lab/kevin Project/Cholangiocarcinoma/output data/oraKeggLeg.rds")
# oraHallmark
# Create an enrichment heatmap
ggplot(oraHallmark, aes(x = pathway, y = reorder(pathway, padj), fill = -log10(padj))) +
geom_tile() +
scale_fill_gradientn(colors = c("blue", "yellow", "red"), na.value = "grey50", trans = "reverse") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(fill = "-log10(p-adj)", x = "Pathway", y = "") +
ggtitle("Enrichment Heatmap of Top Pathways")
#ORA Kegg
# Create an enrichment heatmap
ggplot(oraKeggLeg[1:50, 1:8], aes(x = pathway, y = reorder(pathway, padj), fill = -log10(padj))) +
geom_tile() +
scale_fill_gradientn(colors = c("blue", "yellow", "red"), na.value = "grey50", trans = "reverse") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(fill = "-log10(p-adj)", x = "Pathway", y = "") +
ggtitle("Enrichment Heatmap of Top Pathways")
# Create an enrichment heatmap
ggplot(oraKeggMed[1:50, 1:8], aes(x = pathway, y = reorder(pathway, padj), fill = -log10(padj))) +
geom_tile() +
scale_fill_gradientn(colors = c("blue", "yellow", "red"), na.value = "grey50", trans = "reverse") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(fill = "-log10(p-adj)", x = "Pathway", y = "") +
ggtitle("Enrichment Heatmap of Top Pathways")
# Create example data
set1 <- oraHallmark$pathway
set2 <- oraKeggMed$pathway
set3 <- oraKeggLeg$pathway
# Create a list of sets
sets_list <- list(
oraHallmark = set1,
oraKeggMed = set2,
oraKeggLeg = set3
)
# Create an UpSet plot
upset(fromList(sets_list), main.bar.color = "lightblue", sets.bar.color = "lightblue")